In [1]:
def z_algorithm(string):
# z[0] = 0 (by convention)
# z[i] = the longest prefix of string[0:n) and string[i:n) for 1 <= i < n
# Complexity: O(n)
l, r, n = 0, 0, len(string)
z = [0 for i in range(n)]
for i in range(1, n):
if i <= r:
z[i] = min(r - i + 1, z[i - l])
while i + z[i] < n and string[z[i]] == string[i + z[i]]:
z[i] += 1
if i + z[i] - 1 > r:
l = i
r = i + z[i] - 1
return z
In [2]:
def string_matching(pattern, text):
p, t = len(pattern), len(text)
z = z_algorithm(pattern + '#' + text)
matches = []
for i in range(p + 1, p + t + 1):
if z[i] == p:
matches.append(i - p - 1)
return matches
In [3]:
print(string_matching('ABA', 'CABBCABABAB'))
In [4]:
print(z_algorithm('abababababababababbabababa'))
In [6]:
print(z_algorithm('ababa'))
In [14]:
with open('raven.txt', 'r') as file:
raven = file.read()
In [15]:
print(string_matching('the', raven))
In [16]:
print(string_matching('our', raven))